#!/usr/bin/env python3 ''' Written by dcz in 2022. License: AGPLv3 https://www.gnu.org/licenses/agpl-3.0.en.html All edits turn public domain or CC0 after 10 years. ''' """ Takes JSON from SimpleTabGroups backup, spits out image and stats. See blog at dcz_self.gitlab.com. """ from collections import Counter import datetime from glob import glob import json import os import PIL from PIL import ImageDraw import regex from urllib.parse import urlparse def flat_map(f, xs): ys = [] for x in xs: ys.extend(f(x)) return ys def load_dir(path): files = glob(path + '/stg-backup-*@drive4ik.json') files = sorted(files) def load(path): with open(path) as f: return json.load(f) def when(path): name = os.path.basename(path) date = regex.search('stg-backup-(.*)@drive4ik.json', name).groups()[0] return datetime.date.fromisoformat(date) yield from ((when(p), load(p)) for p in files) def just_group(snap, group_name): d, snap = snap for group in snap['groups']: if group['title'] == group_name: return d, group['tabs'] return d, [] def just_url(snap): date, snap = snap return date, [tab['url'] for tab in snap] def most_tabs(snaps): snaps = map(lambda s: (len(s[1]), s[0]), snaps) return max(snaps) def count_days(tabs): return (tabs[-1][0] - tabs[0][0]).days def draw(tabs): max_column = most_tabs(tabs)[0] days = count_days(tabs) first_day = tabs[0][0] img = PIL.Image.new('RGB', (days, max_column), 'white') draw = ImageDraw.Draw(img) def offset(t): return (t[0] - first_day).days, t[1] for (offset, tabs) in map(offset, tabs): draw.rectangle( ((offset, 0), (offset + 1, len(tabs))), fill=color(offset) ) return img def color(offset): offset = offset * 2 if offset - 256 < 0: return (offset, 0, 0) if offset - 256*2 < 0: return (255, offset % 256, 0) if offset - 256*3 < 0: return (255 - offset % 256, 255, 0) if offset - 256*4 < 0: return (0, 255, offset % 256) if offset - 256*5 < 0: return (offset % 255, 255, 255 - offset % 256) def xorshift(state): state ^= state << 13; state ^= state >> 17; state ^= state << 5; return state; def hashcolor(obj): h = hash(obj) h = xorshift(h) return (h & 0xff, (h >> 8) & 0xff, (h >> 16) & 0xff) def first_seen(snaps): seen = {} for (date, tabs) in snaps: for url in tabs: if url not in seen: seen[url] = date return seen def draw_layers(tabs, seen, color=color): max_column = most_tabs(tabs)[0] days = count_days(tabs) first_day = tabs[0][0] img = PIL.Image.new('RGB', (days, max_column), 'white') draw = ImageDraw.Draw(img) def offset(t): return (t[0] - first_day).days, t[1] for (offset, tabs) in map(offset, tabs): for index, url in enumerate(tabs): seen_offset = (seen[url] - first_day).days draw.point( (offset, index), fill=color(seen_offset) ) return img def draw_hash(tabs): max_column = most_tabs(tabs)[0] days = count_days(tabs) first_day = tabs[0][0] img = PIL.Image.new('RGB', (days, max_column), 'white') draw = ImageDraw.Draw(img) def offset(t): return (t[0] - first_day).days, t[1] for (offset, tabs) in map(offset, tabs): for index, url in enumerate(tabs): draw.point( (offset, index), fill=hashcolor(url) ) return img def domains(snap): d, s = snap return d, [urlparse(url).netloc for url in s] def find_domains(snaps): ret = {} for (date, tabs) in snaps: for dom in tabs: if dom not in ret: ret[dom] = dom return ret def stuck_one(snap): return snap[1][-1] def stuck_three(snap): return snap[1][-3:] if __name__ == "__main__": from argparse import ArgumentParser parser = ArgumentParser() parser.add_argument("source_dir") parser.add_argument('group') parser.add_argument("out_image") args = parser.parse_args() jsons = load_dir(args.source_dir) group = map(lambda s: just_group(s, args.group), jsons) group = map(just_url, group) group = list(group) days = count_days(group) print('days tracked:', days) print('most tabs:', most_tabs(group)) #i = draw(group) #i.save(args.out_image + '.png') seen = first_seen(group) i = draw_layers(group, seen) i.save(args.out_image + '.png') i = draw_layers(group, seen, hashcolor) i.save(args.out_image + '_contrast.png') by_domains = list(map(domains, group)) stucks = map(stuck_one, by_domains) cts = Counter(stucks) print('most stuck:', cts.most_common()[:5]) stucks = flat_map(stuck_three, by_domains) cts = Counter(stucks) print('most stuck 3:', cts.most_common()[:5]) print('unique urls:', len(seen)) all_doms = find_domains(by_domains) print('unique domains', len(all_doms)) i = draw_hash(by_domains) i.save(args.out_image + 'dom.png')